suppressPackageStartupMessages({
library(tidyverse)
library(Seurat)
library(kableExtra)
})
load("data/20250620-seurat_integrated_UMAP_nFeat-ScTypeannotated.Rdata")7 Find Markers
Finds markers (differentially expressed genes) for each of the UMAP blobs
7.1 UMAP
Comparison of UMAPs previously annotated using ScType and clustered with Seurat.
d1 <- DimPlot(alldata, label = TRUE, repel = T, label.size = 5) +
NoLegend()
Idents(alldata) <- alldata$sctype_classification
d2 <- DimPlot(alldata, label = TRUE, repel = T, label.size = 4,) +
NoLegend()
d1|d2all_markers <- markers <- FindAllMarkers(alldata, only.pos = TRUE, densify = T)
# write_csv(all_markers, file = "data/20250728-all_markers.csv")7.2 Top marker genes per cluster
7.2.1 Dot plot
Dot plot showing the scaled expression of the top five marker genes per cluster in the integrated dataset. Clusters correspond to Seurat-defined groups (seurat_clusters) and are ordered according to their assigned cell type (sctype_classification). The size of each dot represents the percentage of cells within a cluster expressing the gene, while the color intensity indicates the average expression level. Cluster labels (X-axis) are color-coded by sctype_classification to visually group clusters belonging to the same cell type.
markers <- read_csv("data/20250728-all_markers.csv")
top5 <- markers |>
filter(pct.1>0.3) |>
group_by(cluster) |>
top_n(5, avg_log2FC)
# Idents set to seurat_cluster
Idents(alldata) <- alldata$seurat_clusters
# Map seurat_clusters to sctype_classification
label_map <- alldata@meta.data |>
select(seurat_clusters, sctype_classification) |>
distinct()
# Reorder seurat_cluster based on sctype_classification
seurat_clusters_ordered <- label_map |>
arrange(sctype_classification) |>
pull(seurat_clusters)
# Generate distinct colors automatically (1 color per sctype_classification)
n_classes <- length(unique(label_map$sctype_classification))
auto_colors <- setNames(
scales::hue_pal()(n_classes),
unique(label_map$sctype_classification)
)
# Make the DotPlot
Idents(alldata) = factor(alldata$seurat_clusters,
levels = seurat_clusters_ordered)
DotPlot(alldata, features = top5$gene) +
coord_flip()+
# Apply color to axis text
theme(
axis.text.x = element_text(
size=14,
colour = setNames(
auto_colors[label_map$sctype_classification],
label_map$seurat_cluster
)[levels(Idents(alldata))]
)
)7.2.2 List of the top five marker genes for each Seurat cluster
kbl(top5) |>
kable_styling("striped") |>
scroll_box(height = "1000px")| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | cluster | gene |
|---|---|---|---|---|---|---|
| 0 | 5.383311 | 0.960 | 0.331 | 0.00e+00 | 0 | Bcl2a1b |
| 0 | 4.897816 | 0.695 | 0.129 | 0.00e+00 | 0 | Mir155hg |
| 0 | 6.163070 | 0.748 | 0.331 | 0.00e+00 | 0 | Osbpl8 |
| 0 | 5.619881 | 0.678 | 0.325 | 0.00e+00 | 0 | Whrn |
| 0 | 6.029228 | 0.711 | 0.361 | 0.00e+00 | 0 | Pdgfb |
| 0 | 6.011845 | 0.906 | 0.396 | 0.00e+00 | 1 | Entpd1 |
| 0 | 7.829072 | 0.841 | 0.393 | 0.00e+00 | 1 | Mertk |
| 0 | 6.109155 | 0.667 | 0.250 | 0.00e+00 | 1 | Adrb2 |
| 0 | 6.177441 | 0.771 | 0.366 | 0.00e+00 | 1 | Fchsd2 |
| 0 | 6.543904 | 0.737 | 0.346 | 0.00e+00 | 1 | Tbc1d9 |
| 0 | 3.940018 | 0.959 | 0.427 | 0.00e+00 | 2 | Serpine2 |
| 0 | 4.273323 | 0.677 | 0.214 | 0.00e+00 | 2 | Baiap2l2 |
| 0 | 3.120491 | 0.668 | 0.370 | 0.00e+00 | 2 | P4ha1 |
| 0 | 3.465477 | 0.596 | 0.332 | 0.00e+00 | 2 | Pfkfb3 |
| 0 | 5.673301 | 0.535 | 0.266 | 0.00e+00 | 2 | Rab7b |
| 0 | 6.686134 | 0.845 | 0.079 | 0.00e+00 | 3 | Gm42756 |
| 0 | 6.653809 | 0.824 | 0.107 | 0.00e+00 | 3 | Sox10 |
| 0 | 6.686665 | 0.846 | 0.197 | 0.00e+00 | 3 | Usp54 |
| 0 | 6.984864 | 0.751 | 0.124 | 0.00e+00 | 3 | Vldlr |
| 0 | 6.891515 | 0.601 | 0.186 | 0.00e+00 | 3 | 5033421B08Rik |
| 0 | 6.586443 | 0.962 | 0.292 | 0.00e+00 | 4 | Gm10790 |
| 0 | 8.725423 | 0.853 | 0.202 | 0.00e+00 | 4 | 8030442B05Rik |
| 0 | 8.813212 | 0.989 | 0.383 | 0.00e+00 | 4 | P2ry12 |
| 0 | 8.095441 | 0.823 | 0.243 | 0.00e+00 | 4 | A830008E24Rik |
| 0 | 6.336412 | 0.917 | 0.356 | 0.00e+00 | 4 | 4933406I18Rik |
| 0 | 12.962007 | 0.967 | 0.065 | 0.00e+00 | 5 | Ocln |
| 0 | 13.197439 | 0.884 | 0.015 | 0.00e+00 | 5 | Ushbp1 |
| 0 | 13.078963 | 0.859 | 0.014 | 0.00e+00 | 5 | Hspa12b |
| 0 | 13.569486 | 0.929 | 0.089 | 0.00e+00 | 5 | Thsd1 |
| 0 | 12.519384 | 0.891 | 0.285 | 0.00e+00 | 5 | Fut8 |
| 0 | 5.305894 | 0.852 | 0.187 | 0.00e+00 | 6 | Lgals3 |
| 0 | 7.534143 | 0.965 | 0.410 | 0.00e+00 | 6 | Ftl1 |
| 0 | 6.665051 | 0.691 | 0.275 | 0.00e+00 | 6 | Itga5 |
| 0 | 5.344663 | 0.687 | 0.286 | 0.00e+00 | 6 | Fabp5 |
| 0 | 9.585140 | 0.532 | 0.310 | 0.00e+00 | 6 | Fth1 |
| 0 | 8.127567 | 0.982 | 0.106 | 0.00e+00 | 7 | Anln |
| 0 | 8.686365 | 0.881 | 0.140 | 0.00e+00 | 7 | Ppp2r2c |
| 0 | 8.975226 | 0.910 | 0.190 | 0.00e+00 | 7 | Sccpdh |
| 0 | 8.860418 | 0.911 | 0.206 | 0.00e+00 | 7 | Eml2 |
| 0 | 13.274086 | 0.808 | 0.253 | 0.00e+00 | 7 | Plin3 |
| 0 | 6.084166 | 0.859 | 0.433 | 0.00e+00 | 8 | Runx1 |
| 0 | 9.114216 | 0.887 | 0.495 | 0.00e+00 | 8 | Basp1 |
| 0 | 6.960778 | 0.758 | 0.389 | 0.00e+00 | 8 | Mir142hg |
| 0 | 6.221935 | 0.527 | 0.308 | 0.00e+00 | 8 | Creb5 |
| 0 | 6.377049 | 0.540 | 0.335 | 0.00e+00 | 8 | Ifitm10 |
| 0 | 7.337465 | 0.858 | 0.089 | 0.00e+00 | 9 | Serpina3n |
| 0 | 7.531294 | 0.904 | 0.171 | 0.00e+00 | 9 | Tmod1 |
| 0 | 8.824104 | 0.864 | 0.171 | 0.00e+00 | 9 | Tspan15 |
| 0 | 10.654820 | 0.791 | 0.124 | 0.00e+00 | 9 | Sh3gl3 |
| 0 | 7.370502 | 0.558 | 0.049 | 0.00e+00 | 9 | Kit |
| 0 | 6.669409 | 0.984 | 0.076 | 0.00e+00 | 10 | H2-Aa |
| 0 | 6.107818 | 0.965 | 0.060 | 0.00e+00 | 10 | H2-Eb1 |
| 0 | 5.653954 | 0.980 | 0.176 | 0.00e+00 | 10 | H2-Ab1 |
| 0 | 6.099866 | 0.995 | 0.207 | 0.00e+00 | 10 | Cd74 |
| 0 | 6.207174 | 0.927 | 0.218 | 0.00e+00 | 10 | Itgax |
| 0 | 10.965876 | 0.855 | 0.010 | 0.00e+00 | 11 | B230323A14Rik |
| 0 | 11.208839 | 0.836 | 0.024 | 0.00e+00 | 11 | Sfrp5 |
| 0 | 10.121493 | 0.824 | 0.153 | 0.00e+00 | 11 | Fam228b |
| 0 | 11.441386 | 0.821 | 0.170 | 0.00e+00 | 11 | Rgma |
| 0 | 12.556664 | 0.696 | 0.474 | 0.00e+00 | 11 | Sparc |
| 0 | 4.613013 | 0.755 | 0.234 | 0.00e+00 | 12 | Gm15155 |
| 0 | 6.292254 | 0.973 | 0.466 | 0.00e+00 | 12 | Plxdc2 |
| 0 | 6.879256 | 0.858 | 0.407 | 0.00e+00 | 12 | Nrp1 |
| 0 | 4.688339 | 0.826 | 0.437 | 0.00e+00 | 12 | Gpr34 |
| 0 | 4.622451 | 0.332 | 0.301 | 0.00e+00 | 12 | Plcl1 |
| 0 | 16.714419 | 0.975 | 0.036 | 0.00e+00 | 13 | Gldc |
| 0 | 14.989121 | 0.934 | 0.039 | 0.00e+00 | 13 | Grin2c |
| 0 | 11.897805 | 0.929 | 0.045 | 0.00e+00 | 13 | St6galnac5 |
| 0 | 14.997302 | 0.884 | 0.087 | 0.00e+00 | 13 | 4930488L21Rik |
| 0 | 12.983120 | 0.998 | 0.264 | 0.00e+00 | 13 | Tspan7 |
| 0 | 10.029174 | 0.914 | 0.122 | 0.00e+00 | 14 | Mx1 |
| 0 | 9.720563 | 0.848 | 0.120 | 0.00e+00 | 14 | A330040F15Rik |
| 0 | 9.021241 | 0.849 | 0.128 | 0.00e+00 | 14 | Rsad2 |
| 0 | 10.356806 | 0.988 | 0.285 | 0.00e+00 | 14 | Ifi204 |
| 0 | 10.424149 | 0.921 | 0.422 | 0.00e+00 | 14 | Pik3ap1 |
| 0 | 17.534114 | 0.983 | 0.003 | 0.00e+00 | 15 | Cxcr6 |
| 0 | 19.458819 | 0.973 | 0.008 | 0.00e+00 | 15 | Lck |
| 0 | 16.470059 | 0.984 | 0.020 | 0.00e+00 | 15 | Trac |
| 0 | 16.619246 | 0.958 | 0.002 | 0.00e+00 | 15 | Cd226 |
| 0 | 16.343388 | 0.948 | 0.025 | 0.00e+00 | 15 | Runx3 |
| 0 | 13.051094 | 0.994 | 0.015 | 0.00e+00 | 16 | Drc7 |
| 0 | 11.774016 | 0.991 | 0.025 | 0.00e+00 | 16 | Prr32 |
| 0 | 11.639716 | 0.998 | 0.034 | 0.00e+00 | 16 | Clic6 |
| 0 | 12.388454 | 0.998 | 0.203 | 0.00e+00 | 16 | Ezr |
| 0 | 12.746285 | 1.000 | 0.348 | 0.00e+00 | 16 | Cab39l |
| 0 | 12.246278 | 0.981 | 0.401 | 0.00e+00 | 17 | Syngr1 |
| 0 | 9.275217 | 0.994 | 0.438 | 0.00e+00 | 17 | Tyrobp |
| 0 | 10.485866 | 0.997 | 0.506 | 0.00e+00 | 17 | Rplp1 |
| 0 | 11.219707 | 0.991 | 0.501 | 0.00e+00 | 17 | Tmsb4x |
| 0 | 9.060232 | 0.990 | 0.529 | 0.00e+00 | 17 | Fcer1g |
| 0 | 18.534854 | 0.938 | 0.021 | 0.00e+00 | 18 | Ryr2 |
| 0 | 19.407485 | 0.906 | 0.015 | 0.00e+00 | 18 | Grin2a |
| 0 | 21.165791 | 0.914 | 0.389 | 0.00e+00 | 18 | Celf2 |
| 0 | 22.657484 | 0.762 | 0.009 | 0.00e+00 | 18 | A230006K03Rik |
| 0 | 21.655781 | 0.726 | 0.036 | 0.00e+00 | 18 | Sncb |
| 0 | 7.605571 | 0.992 | 0.030 | 0.00e+00 | 19 | Slc6a20a |
| 0 | 7.758149 | 0.994 | 0.054 | 0.00e+00 | 19 | Atp13a5 |
| 0 | 7.764430 | 0.995 | 0.068 | 0.00e+00 | 19 | Notch3 |
| 0 | 7.773113 | 1.000 | 0.074 | 0.00e+00 | 19 | Vtn |
| 0 | 8.832420 | 0.923 | 0.045 | 0.00e+00 | 19 | Aspn |
| 0 | 17.112431 | 0.985 | 0.018 | 0.00e+00 | 20 | Sp9 |
| 0 | 16.538981 | 0.956 | 0.034 | 0.00e+00 | 20 | Kcnb2 |
| 0 | 16.118414 | 0.976 | 0.306 | 0.00e+00 | 20 | Pbx3 |
| 0 | 15.010216 | 0.932 | 0.021 | 0.00e+00 | 20 | Epha6 |
| 0 | 16.698508 | 0.884 | 0.034 | 0.00e+00 | 20 | Dlx2 |
| 0 | 7.129027 | 0.822 | 0.194 | 0.00e+00 | 21 | Tmem178b |
| 0 | 7.083616 | 0.748 | 0.238 | 0.00e+00 | 21 | Nbas |
| 0 | 6.807112 | 0.701 | 0.160 | 0.00e+00 | 21 | Nkain1 |
| 0 | 6.915113 | 0.698 | 0.214 | 0.00e+00 | 21 | Arhgef28 |
| 0 | 10.566937 | 0.521 | 0.285 | 0.00e+00 | 21 | Nckap1 |
| 0 | 13.006318 | 0.907 | 0.012 | 0.00e+00 | 22 | F13a1 |
| 0 | 13.960084 | 0.960 | 0.231 | 0.00e+00 | 22 | Ms4a6c |
| 0 | 12.783361 | 0.886 | 0.061 | 0.00e+00 | 22 | Pf4 |
| 0 | 12.916844 | 0.803 | 0.247 | 0.00e+00 | 22 | Stab1 |
| 0 | 13.608312 | 0.753 | 0.353 | 0.00e+00 | 22 | Maf |
| 0 | 12.109861 | 1.000 | 0.064 | 0.00e+00 | 23 | Ndufa4l2 |
| 0 | 11.042306 | 1.000 | 0.070 | 0.00e+00 | 23 | Egflam |
| 0 | 11.110187 | 0.990 | 0.110 | 0.00e+00 | 23 | Arhgap42 |
| 0 | 13.643806 | 0.990 | 0.255 | 0.00e+00 | 23 | Ctdspl |
| 0 | 10.951412 | 0.969 | 0.329 | 0.00e+00 | 23 | Septin11 |
| 0 | 24.821567 | 1.000 | 0.167 | 0.00e+00 | 24 | Cfap141 |
| 0 | 27.457694 | 0.996 | 0.003 | 0.00e+00 | 24 | Sntn |
| 0 | 25.591092 | 0.991 | 0.027 | 0.00e+00 | 24 | Fam166b |
| 0 | 29.195085 | 0.991 | 0.084 | 0.00e+00 | 24 | Cdhr4 |
| 0 | 25.670129 | 0.957 | 0.024 | 0.00e+00 | 24 | Stoml3 |
| 0 | 13.385614 | 0.970 | 0.090 | 0.00e+00 | 25 | Pdgfra |
| 0 | 13.573818 | 1.000 | 0.272 | 0.00e+00 | 25 | Epn2 |
| 0 | 18.279946 | 0.970 | 0.290 | 0.00e+00 | 25 | Itga9 |
| 0 | 14.885678 | 0.827 | 0.508 | 0.00e+00 | 25 | Marcks |
| 0 | 12.627057 | 0.523 | 0.246 | 4.70e-06 | 25 | Agap1 |
| 0 | 11.521800 | 0.959 | 0.293 | 0.00e+00 | 26 | Tpm1 |
| 0 | 12.845471 | 0.942 | 0.302 | 0.00e+00 | 26 | Crim1 |
| 0 | 11.753684 | 0.884 | 0.075 | 0.00e+00 | 26 | Rasd1 |
| 0 | 13.341255 | 0.901 | 0.348 | 0.00e+00 | 26 | Rbpms |
| 0 | 14.338538 | 0.709 | 0.319 | 0.00e+00 | 26 | Slc38a2 |
| 0 | 8.995693 | 1.000 | 0.120 | 0.00e+00 | 27 | Cdca3 |
| 0 | 9.018390 | 1.000 | 0.162 | 0.00e+00 | 27 | Cdca8 |
| 0 | 8.773329 | 0.979 | 0.141 | 0.00e+00 | 27 | Cenpe |
| 0 | 8.967621 | 0.965 | 0.057 | 0.00e+00 | 27 | Hmmr |
| 0 | 9.105028 | 0.896 | 0.108 | 0.00e+00 | 27 | Aspm |
| 0 | 9.776108 | 1.000 | 0.197 | 0.00e+00 | 28 | Grid2 |
| 0 | 10.209713 | 1.000 | 0.240 | 0.00e+00 | 28 | Ust |
| 0 | 9.873548 | 1.000 | 0.300 | 0.00e+00 | 28 | Pals2 |
| 0 | 11.913883 | 0.993 | 0.277 | 0.00e+00 | 28 | Ank2 |
| 0 | 9.693226 | 1.000 | 0.091 | 0.00e+00 | 28 | Cap2 |
| 0 | 11.310231 | 1.000 | 0.051 | 0.00e+00 | 29 | Otx2os1 |
| 0 | 9.001266 | 1.000 | 0.186 | 0.00e+00 | 29 | Pip5k1b |
| 0 | 8.989740 | 0.990 | 0.040 | 0.00e+00 | 29 | Kcne2 |
| 0 | 10.779732 | 0.990 | 0.230 | 0.00e+00 | 29 | Sh3d19 |
| 0 | 11.824433 | 0.958 | 0.037 | 0.00e+00 | 29 | Atp2b3 |
| 0 | 7.039520 | 0.878 | 0.080 | 0.00e+00 | 30 | Fgfr3 |
| 0 | 6.968599 | 0.796 | 0.064 | 0.00e+00 | 30 | Agt |
| 0 | 7.973467 | 0.816 | 0.189 | 0.00e+00 | 30 | Rhou |
| 0 | 6.571570 | 0.633 | 0.184 | 3.38e-05 | 30 | Vegfa |
| 0 | 7.359981 | 0.633 | 0.089 | 6.51e-05 | 30 | Gabra2 |
7.2.3 Upset chart
The UpSet chart visualizes the overlaps in marker genes between the different Seurat clusters identified by FindAllMarkers (~Venn diagram for many sets).
- Horizontal bars on the left show the total number of unique marker genes for each individual cluster.
- Vertical bars at the top quantify the size of each intersection—that is, how many genes are shared by the specific combination of clusters highlighted by the connected black dots below.
- Single black dots (no connecting lines) indicate marker genes unique to a single cluster.
- Multiple connected dots indicate sets of clusters that share the same marker genes; the connecting lines trace which clusters are part of the intersection.
- A taller vertical bar means a larger number of genes are shared in that particular combination.
- Conversely, shorter bars correspond to smaller overlaps.
In biological terms,
- Large overlaps between clusters may indicate closely related cell states or subtypes—perhaps reflecting transitional phenotypes, activation states, or technical resolution limits of clustering.
- Small or absent overlaps suggest distinct transcriptional programs, supporting the idea that the clusters represent separate cell types or states.
library(UpSetR)
all_markers <- read_csv("/Users/arbones/Dropbox/SyncBriefcase/LAB/UK/scRNA_Akhil/Analysis/Seurat/data/20250728-all_markers.csv")
mark <- markers |>
group_by(cluster) |>
summarise(genes = list(unique(gene)))
# Convert to a named list
mark_list <- setNames(mark$genes, mark$cluster)
upset(fromList(mark_list),order.by = "freq", keep.order = T,
nsets = 31, nintersects = 40,
mainbar.y.label = "Common genes\nbetween clusters",
sets.x.label = "Gene Markers per cluster",
point.size = 3,
mb.ratio = c(0.4,0.6),
text.scale = 1.8)7.2.4 Violin plots
Normalized expression values from the SCT assay in your Seurat object for the marker genes
The SCT assay stores log-normalized residual expression values from SCTransform. Values are continuous and typically centered around 0
library(gtable)
library(grid)
library(ggtext)
library(ggtext) # needed for colored axis labels
genes_of_interest <- c("P2ry12","Trem2","Clec7a","Mrc1","Cd3e","sct_Cd8a",
"Ptprc","sct_Aldh1l1","Aldoc","Mog",
"Olig2","Pdgfra","Ttr","Ccdc153","sct_Slc47a1","Flt1","Emcn",
"sct_Col1a2","sct_Bgn","Vtn","Kcnj8","Dcx","Snap25","Rbfox3")
violin_data <- FetchData(alldata, c(genes_of_interest,"seurat_clusters"), assay = "SCT")
violin_data_long <- violin_data |>
pivot_longer(-seurat_clusters) |>
mutate(seurat_clusters = fct_rev(seurat_clusters),
name = factor(name, levels = genes_of_interest))
# Get the default ggplot discrete colors
cluster_levels <- levels(violin_data_long$seurat_clusters)
palette_colors <- scales::hue_pal()(length(cluster_levels))
names(palette_colors) <- cluster_levels
# Create colored axis labels (HTML span with color)
axis_labels <- purrr::map2_chr(cluster_levels, palette_colors,
~sprintf("<span style='color:%s'>%s</span>", .y, .x))
p <- ggplot(violin_data_long, aes(x = seurat_clusters, y = value, fill = seurat_clusters)) +
geom_violin(aes(color = seurat_clusters),
scale = "width", trim = TRUE, adjust = 1, show.legend = FALSE) +
coord_flip() +
scale_fill_manual(values = palette_colors) +
scale_color_manual(values = palette_colors) +
facet_wrap(~name, scales = "free_x", nrow = 1) +
theme_minimal() +
labs (x = "Cluster", y = "Expression Levels")+
theme(
legend.position = "left",
axis.text.y = element_markdown(size = 12),
axis.text.x = element_text(size = 6),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
strip.text = element_text(size=11, face = "italic")
) +
scale_x_discrete(labels = axis_labels)
# Convert ggplot object to grob (graphical object)
g <- ggplotGrob(p)
# Find all the left axis grobs (y-axis elements)
axis_grobs <- which(grepl("axis-l", g$layout$name))
# Remove y-axis text from all but the first facet
invisible(
map(axis_grobs, function(i) {
# The first facet left axis is named 'axis-l-1-1'
if (g$layout[i, "name"] != "axis-l-1-1") {
g$grobs[[i]] <<- nullGrob()
}
})
)
# Draw the modified plot
grid.newpage()
grid.draw(g)